<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LLM Katan Multi-Instance Demo</title>
    <script src="https://unpkg.com/typeit@8.8.0/dist/index.umd.js"></script>
    <style>
        body {
            background: #1e1e1e;
            color: #d4d4d4;
            font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
            margin: 0;
            padding: 20px;
            font-size: 14px;
            line-height: 1.4;
        }

        .terminal-container {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 20px;
            max-width: 1400px;
            margin: 0 auto;
            grid-template-areas:
                "terminal1 terminal3"
                "terminal2 terminal3";
        }

        .terminal {
            background: #1e1e1e;
            border: 1px solid #333;
            border-radius: 8px;
            padding: 15px;
            min-height: 300px;
            position: relative;
        }

        .terminal-header {
            color: #569cd6;
            font-weight: bold;
            margin-bottom: 10px;
            padding-bottom: 5px;
            border-bottom: 1px solid #333;
        }

        .prompt {
            color: #4ec9b0;
        }

        .command {
            color: #ce9178;
        }

        .output {
            color: #d4d4d4;
        }

        .success {
            color: #4fc1e9;
        }

        .title {
            text-align: center;
            color: #569cd6;
            font-size: 24px;
            margin-bottom: 30px;
        }

        .description {
            text-align: center;
            color: #9cdcfe;
            margin-bottom: 30px;
            font-size: 16px;
        }

        .terminal1 {
            grid-area: terminal1;
        }

        .terminal2 {
            grid-area: terminal2;
        }

        .terminal3 {
            grid-area: terminal3;
            min-height: 500px;
            max-height: 600px;
            font-size: 12px;
            line-height: 1.2;
        }
    </style>
</head>
<body>
    <div class="title">🚀 LLM Katan Multi-Instance Demo</div>
    <div class="description">Multi-instance setup + Enhanced OpenAI API compatibility showcase</div>

    <div class="terminal-container">
        <div class="terminal terminal1">
            <div class="terminal-header">Terminal 1: GPT-3.5-Turbo Instance</div>
            <div id="terminal1"></div>
        </div>

        <div class="terminal terminal2">
            <div class="terminal-header">Terminal 2: Claude-3-Haiku Instance</div>
            <div id="terminal2"></div>
        </div>

        <div class="terminal terminal3">
            <div class="terminal-header">Terminal 3: Testing Enhanced OpenAI Compatibility</div>
            <div id="terminal3"></div>
        </div>
    </div>

    <script>
        // Terminal 1: GPT-3.5-Turbo setup
        new TypeIt("#terminal1", {
            speed: 50,
            waitUntilVisible: true
        })
        .type('<span class="prompt">$</span> <span class="command">pip install llm-katan</span>')
        .break()
        .type('<span class="output">Successfully installed llm-katan-0.1.8</span>')
        .break()
        .break()
        .pause(1000)
        .type('<span class="prompt">$</span> <span class="command">llm-katan --model Qwen/Qwen3-0.6B --port 8000 \\</span>')
        .break()
        .type('<span class="command">  --served-model-name "gpt-3.5-turbo"</span>')
        .break()
        .pause(500)
        .type('<span class="success">🚀 Starting LLM Katan server with model: Qwen/Qwen3-0.6B</span>')
        .break()
        .type('<span class="success">📛 Served model name: gpt-3.5-turbo</span>')
        .break()
        .type('<span class="success">✅ Server running on http://0.0.0.0:8000</span>')
        .go();

        // Terminal 2: Claude-3-Haiku setup (delayed start)
        setTimeout(() => {
            new TypeIt("#terminal2", {
                speed: 50,
                waitUntilVisible: true
            })
            .type('<span class="prompt">$</span> <span class="command">llm-katan --model Qwen/Qwen3-0.6B --port 8001 \\</span>')
            .break()
            .type('<span class="command">  --served-model-name "claude-3-haiku"</span>')
            .break()
            .pause(500)
            .type('<span class="success">🚀 Starting LLM Katan server with model: Qwen/Qwen3-0.6B</span>')
            .break()
            .type('<span class="success">📛 Served model name: claude-3-haiku</span>')
            .break()
            .type('<span class="success">✅ Server running on http://0.0.0.0:8001</span>')
            .go();
        }, 3000);

        // Terminal 3: Testing both endpoints (starts after both servers finish)
        setTimeout(() => {
            new TypeIt("#terminal3", {
                speed: 17,
                waitUntilVisible: true
            })
            .type('<span class="success"># Both servers are now running! Let\'s test enhanced OpenAI compatibility...</span>')
            .break()
            .break()
            .pause(1000)
            .type('<span class="prompt">$</span> <span class="command">curl http://localhost:8000/v1/models | jq \'.data[0].id\'</span>')
            .break()
            .type('<span class="output">"gpt-3.5-turbo"</span>')
            .break()
            .break()
            .pause(1000)
            .type('<span class="prompt">$</span> <span class="command">curl http://localhost:8001/v1/models | jq \'.data[0].id\'</span>')
            .break()
            .type('<span class="output">"claude-3-haiku"</span>')
            .break()
            .break()
            .pause(1000)
            .type('<span class="success"># Testing full OpenAI-compatible response</span>')
            .break()
            .type('<span class="prompt">$</span> <span class="command">curl -X POST http://localhost:8000/v1/chat/completions \\</span>')
            .break()
            .type('<span class="command">  -H "Content-Type: application/json" \\</span>')
            .break()
            .type('<span class="command">  -d \'{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Hi!"}]}\'</span>')
            .break()
            .pause(1000)
            .type('<span class="output">{</span>')
            .break()
            .type('<span class="output">  "id": "cmpl-mock-1734567890",</span>')
            .break()
            .type('<span class="output">  "object": "chat.completion",</span>')
            .break()
            .type('<span class="output">  "created": 1734567890,</span>')
            .break()
            .type('<span class="output">  "model": "gpt-3.5-turbo",</span>')
            .break()
            .type('<span class="output">  "system_fingerprint": "llm-katan-v0.1.8",</span>')
            .break()
            .type('<span class="output">  "choices": [{</span>')
            .break()
            .type('<span class="output">    "index": 0,</span>')
            .break()
            .type('<span class="output">    "message": {"role": "assistant", "content": "Hello! How can I help?"},</span>')
            .break()
            .type('<span class="output">    "finish_reason": "stop",</span>')
            .break()
            .type('<span class="output">    "logprobs": null</span>')
            .break()
            .type('<span class="output">  }],</span>')
            .break()
            .type('<span class="output">  "usage": {</span>')
            .break()
            .type('<span class="output">    "prompt_tokens": 12,</span>')
            .break()
            .type('<span class="output">    "completion_tokens": 8,</span>')
            .break()
            .type('<span class="output">    "total_tokens": 20,</span>')
            .break()
            .type('<span class="output">    "prompt_tokens_details": {"cached_tokens": 0},</span>')
            .break()
            .type('<span class="output">    "completion_tokens_details": {"reasoning_tokens": 0}</span>')
            .break()
            .type('<span class="output">  },</span>')
            .break()
            .type('<span class="output">  "token_usage": {</span>')
            .break()
            .type('<span class="output">    "prompt_tokens": 12, "completion_tokens": 8, "total_tokens": 20</span>')
            .break()
            .type('<span class="output">  }</span>')
            .break()
            .type('<span class="output">}</span>')
            .break()
            .pause(1000)
            .type('<span class="success"># ✨ Enhanced compatibility with all OpenAI SDK fields!</span>')
            .break()
            .type('<span class="success"># 🎯 Same tiny model, multiple providers, full API support</span>')
            .go();
        }, 8500); // Start after both terminals complete (~8.5 seconds)
    </script>
</body>
</html>
